From 26696a741e6e1ccb4f50721336359af9267c196e Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sun, 12 Jul 2020 04:20:19 +0200 Subject: [PATCH] timsort: Add change tracking to gtk_tim_sort_step() --- gtk/gtksortlistmodel.c | 2 +- gtk/gtktimsort-impl.c | 147 +++++++++++++++++----------------------- gtk/gtktimsort.c | 53 ++++++++++++--- gtk/gtktimsortprivate.h | 3 +- testsuite/gtk/timsort.c | 41 +++++++++++ 5 files changed, 150 insertions(+), 96 deletions(-) diff --git a/gtk/gtksortlistmodel.c b/gtk/gtksortlistmodel.c index 5dfc6fb4cd..52c7afebbb 100644 --- a/gtk/gtksortlistmodel.c +++ b/gtk/gtksortlistmodel.c @@ -171,7 +171,7 @@ gtk_sort_list_model_sort_cb (gpointer data) { GtkSortListModel *self = data; - if (gtk_tim_sort_step (&self->sort)) + if (gtk_tim_sort_step (&self->sort, NULL)) { guint n_items = sort_array_get_size (&self->items); g_list_model_items_changed (G_LIST_MODEL (self), 0, n_items, n_items); diff --git a/gtk/gtktimsort-impl.c b/gtk/gtktimsort-impl.c index 8878db6a41..3e60706b21 100644 --- a/gtk/gtktimsort-impl.c +++ b/gtk/gtktimsort-impl.c @@ -82,14 +82,18 @@ static void gtk_tim_sort(reverse_range) (GtkTimSort *self, * the specified array */ static gsize -gtk_tim_sort(prepare_run) (GtkTimSort *self) +gtk_tim_sort(prepare_run) (GtkTimSort *self, + GtkTimSortRun *out_change) { gsize run_hi = 1; char *cur; char *next; if (self->size <= run_hi) - return self->size; + { + gtk_tim_sort_set_change (out_change, NULL, 0); + return self->size; + } cur = INCPTR (self->base); next = INCPTR (cur); @@ -105,6 +109,7 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self) next = INCPTR (next); } gtk_tim_sort(reverse_range) (self, self->base, run_hi); + gtk_tim_sort_set_change (out_change, self->base, run_hi); } else /* Ascending */ { @@ -114,6 +119,7 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self) cur = next; next = INCPTR (next); } + gtk_tim_sort_set_change (out_change, NULL, 0); } return run_hi; @@ -135,13 +141,16 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self) * @param start the index of the first element in the range that is * not already known to be sorted ({@code lo <= start <= hi}) */ -static void gtk_tim_sort(binary_sort) (GtkTimSort *self, - gpointer a, - gsize hi, - gsize start) +static void gtk_tim_sort(binary_sort) (GtkTimSort *self, + gpointer a, + gsize hi, + gsize start, + GtkTimSortRun *inout_change) { DEFINE_TEMP (pivot); char *startp; + char *change_min = ELEM (a, hi); + char *change_max = a; g_assert (start <= hi); @@ -186,22 +195,39 @@ static void gtk_tim_sort(binary_sort) (GtkTimSort *self, * Slide elements over to make room to make room for pivot. */ n = startp - leftp; /* The number of bytes to move */ + if (n == 0) + continue; ASSIGN (pivot, startp); memmove (INCPTR (leftp), leftp, n); /* POP: overlaps */ /* a[left] = pivot; */ ASSIGN (leftp, pivot); + + change_min = MIN (change_min, leftp); + change_max = MAX (change_max, ELEM (startp, 1)); + } + + if (change_max > (char *) a) + { + g_assert (change_min < ELEM (a, hi)); + if (inout_change && inout_change->len) + { + change_max = MAX (change_max, ELEM (inout_change->base, inout_change->len)); + change_min = MIN (change_min, (char *) inout_change->base); + } + gtk_tim_sort_set_change (inout_change, change_min, (change_max - change_min) / WIDTH); } } static gboolean -gtk_tim_sort(merge_append) (GtkTimSort *self) +gtk_tim_sort(merge_append) (GtkTimSort *self, + GtkTimSortRun *out_change) { /* Identify next run */ gsize run_len; - run_len = gtk_tim_sort(prepare_run) (self); + run_len = gtk_tim_sort(prepare_run) (self, out_change); if (run_len == 0) return FALSE; @@ -209,7 +235,7 @@ gtk_tim_sort(merge_append) (GtkTimSort *self) if (run_len < self->min_run) { gsize force = MIN (self->size, self->min_run); - gtk_tim_sort(binary_sort) (self, self->base, force, run_len); + gtk_tim_sort(binary_sort) (self, self->base, force, run_len, out_change); run_len = force; } /* Push run onto pending-run stack, and maybe merge */ @@ -218,71 +244,6 @@ gtk_tim_sort(merge_append) (GtkTimSort *self) return TRUE; } -#if 0 -static int gtk_tim_sort(timsort) (void *a, gsize nel) -{ - int err = SUCCESS; - GtkTimSort self; - gsize self->min_run; - - g_assert (a || !nel || !width); - g_assert (c); - - if (nel < 2 || !width) - return err; /* Arrays of size 0 and 1 are always sorted */ - - /* If array is small, do a "mini-TimSort" with no merges */ - if (nel < MIN_MERGE) - { - gsize initRunLen = - gtk_tim_sort(prepare_run) (a, nel, CMPARGS (c, carg)); - gtk_tim_sort(binary_sort) (self, a, nel, initRunLen, CMPARGS (c, carg)); - return err; - } - - /* - * March over the array once, left to right, finding natural runs, - * extending short natural runs to self->min_run elements, and merging runs - * to maintain stack invariant. - */ - if ((err = timsort_init (&self, a, nel, CMPARGS (c, carg)))) - return err; - - do - { - /* Identify next run */ - gsize run_len = - gtk_tim_sort(prepare_run) (a, nel, CMPARGS (c, carg)); - - /* If run is short, extend to min(self->min_run, nel) */ - if (run_len < self->min_run) - { - gsize force = nel <= self->min_run ? nel : self->min_run; - gtk_tim_sort(binary_sort) (a, force, run_len, CMPARGS (c, carg)); - run_len = force; - } - /* Push run onto pending-run stack, and maybe merge */ - gtk_tim_sort_push_run (&self, a, run_len); - if ((err = gtk_tim_sort(mergeCollapse) (&self))) - goto out; - - /* Advance to find next run */ - a = ELEM (a, run_len); - nel -= run_len; - } - while (nel != 0); - - /* Merge all remaining runs to complete sort */ - if ((err = gtk_tim_sort(merge_force_collapse) (&self))) - goto out; - - g_assert (self.pending_runs == 1); -out: - timsort_deinit (&self); - return err; -} -#endif - /* * Locates the position at which to insert the specified key into the * specified sorted range; if the range contains an element equal to key, @@ -791,8 +752,9 @@ outer: * @param i stack index of the first of the two runs to merge */ static void -gtk_tim_sort(merge_at) (GtkTimSort *self, - gsize i) +gtk_tim_sort(merge_at) (GtkTimSort *self, + gsize i, + GtkTimSortRun *out_change) { gpointer base1 = self->run[i].base; gsize len1 = self->run[i].len; @@ -813,7 +775,10 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, base1 = ELEM (base1, k); len1 -= k; if (len1 == 0) - goto done; + { + gtk_tim_sort_set_change (out_change, NULL, 0); + goto done; + } /* * Find where the last element of run1 goes in run2. Subsequent elements @@ -823,7 +788,10 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, ELEM (base1, len1 - 1), base2, len2, len2 - 1); if (len2 == 0) - goto done; + { + gtk_tim_sort_set_change (out_change, NULL, 0); + goto done; + } /* Merge remaining runs, using tmp array with min(len1, len2) elements */ if (len1 <= len2) @@ -832,6 +800,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, { base1 = ELEM (self->run[i].base, self->run[i].len - self->max_merge_size); gtk_tim_sort(merge_lo) (self, base1, self->max_merge_size, base2, len2); + gtk_tim_sort_set_change (out_change, base1, self->max_merge_size + len2); self->run[i].len -= self->max_merge_size; self->run[i + 1].base = ELEM (self->run[i + 1].base, - self->max_merge_size); self->run[i + 1].len += self->max_merge_size; @@ -841,6 +810,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, else { gtk_tim_sort(merge_lo) (self, base1, len1, base2, len2); + gtk_tim_sort_set_change (out_change, base1, len1 + len2); } } else @@ -848,6 +818,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, if (len2 > self->max_merge_size) { gtk_tim_sort(merge_hi) (self, base1, len1, base2, self->max_merge_size); + gtk_tim_sort_set_change (out_change, base1, len1 + self->max_merge_size); self->run[i].len += self->max_merge_size; self->run[i + 1].base = ELEM (self->run[i + 1].base, self->max_merge_size); self->run[i + 1].len -= self->max_merge_size; @@ -857,6 +828,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self, else { gtk_tim_sort(merge_hi) (self, base1, len1, base2, len2); + gtk_tim_sort_set_change (out_change, base1, len1 + len2); } } @@ -893,7 +865,8 @@ done: * */ static gboolean -gtk_tim_sort(merge_collapse) (GtkTimSort *self) +gtk_tim_sort(merge_collapse) (GtkTimSort *self, + GtkTimSortRun *out_change) { GtkTimSortRun *run = self->run; gsize n; @@ -913,7 +886,7 @@ gtk_tim_sort(merge_collapse) (GtkTimSort *self) return FALSE; /* Invariant is established */ } - gtk_tim_sort(merge_at) (self, n); + gtk_tim_sort(merge_at) (self, n, out_change); return TRUE; } @@ -922,7 +895,8 @@ gtk_tim_sort(merge_collapse) (GtkTimSort *self) * called once, to complete the sort. */ static gboolean -gtk_tim_sort(merge_force_collapse) (GtkTimSort *self) +gtk_tim_sort(merge_force_collapse) (GtkTimSort *self, + GtkTimSortRun *out_change) { gsize n; @@ -932,22 +906,23 @@ gtk_tim_sort(merge_force_collapse) (GtkTimSort *self) n = self->pending_runs - 2; if (n > 0 && self->run[n - 1].len < self->run[n + 1].len) n--; - gtk_tim_sort(merge_at) (self, n); + gtk_tim_sort(merge_at) (self, n, out_change); return TRUE; } static gboolean -gtk_tim_sort(step) (GtkTimSort * self) +gtk_tim_sort(step) (GtkTimSort *self, + GtkTimSortRun *out_change) { g_assert (self); - if (gtk_tim_sort(merge_collapse) (self)) + if (gtk_tim_sort(merge_collapse) (self, out_change)) return TRUE; - if (gtk_tim_sort(merge_append) (self)) + if (gtk_tim_sort(merge_append) (self, out_change)) return TRUE; - if (gtk_tim_sort(merge_force_collapse) (self)) + if (gtk_tim_sort(merge_force_collapse) (self, out_change)) return TRUE; return FALSE; diff --git a/gtk/gtktimsort.c b/gtk/gtktimsort.c index e76c915da0..4ddb6c9245 100644 --- a/gtk/gtktimsort.c +++ b/gtk/gtktimsort.c @@ -99,7 +99,7 @@ gtk_tim_sort (gpointer base, gtk_tim_sort_init (&self, base, size, element_size, compare_func, user_data); - while (gtk_tim_sort_step (&self)); + while (gtk_tim_sort_step (&self, NULL)); gtk_tim_sort_finish (&self); } @@ -172,6 +172,18 @@ gtk_tim_sort_ensure_capacity (GtkTimSort *self, return self->tmp; } +static void +gtk_tim_sort_set_change (GtkTimSortRun *out_change, + gpointer base, + gsize len) +{ + if (out_change) + { + out_change->base = base; + out_change->len = len; + } +} + /* * gtk_tim_sort_get_runs: * @self: a #GtkTimSort @@ -269,23 +281,48 @@ gtk_tim_sort_set_max_merge_size (GtkTimSort *self, #define WIDTH (self->element_size) #include "gtktimsort-impl.c" +/* + * gtk_tim_sort_step: + * @self: a #GtkTimSort + * @out_change: (optional): Return location for changed + * area. If a change did not cause any changes (for example, + * if an already sorted array gets sorted), out_change + * will be set to %NULL and 0. + * + * Performs another step in the sorting process. If a + * step was performed, %TRUE is returned and @out_change is + * set to the smallest area that contains all changes while + * sorting. + * + * If the data is completely sorted, %FALSE will be + * returned. + * + * Returns: %TRUE if an action was performed + **/ gboolean -gtk_tim_sort_step (GtkTimSort *self) +gtk_tim_sort_step (GtkTimSort *self, + GtkTimSortRun *out_change) { + gboolean result; + g_assert (self); switch (self->element_size) { -#if 1 case 4: - return gtk_tim_sort_step_4 (self); + result = gtk_tim_sort_step_4 (self, out_change); + break; case 8: - return gtk_tim_sort_step_8 (self); + result = gtk_tim_sort_step_8 (self, out_change); + break; case 16: - return gtk_tim_sort_step_16 (self); -#endif + result = gtk_tim_sort_step_16 (self, out_change); + break; default: - return gtk_tim_sort_step_default(self); + result = gtk_tim_sort_step_default (self, out_change); + break; } + + return result; } diff --git a/gtk/gtktimsortprivate.h b/gtk/gtktimsortprivate.h index 7472453556..a864a9c936 100644 --- a/gtk/gtktimsortprivate.h +++ b/gtk/gtktimsortprivate.h @@ -108,7 +108,8 @@ void gtk_tim_sort_set_runs (GtkTimSort void gtk_tim_sort_set_max_merge_size (GtkTimSort *self, gsize max_merge_size); -gboolean gtk_tim_sort_step (GtkTimSort *self); +gboolean gtk_tim_sort_step (GtkTimSort *self, + GtkTimSortRun *out_change); void gtk_tim_sort (gpointer base, gsize size, diff --git a/testsuite/gtk/timsort.c b/testsuite/gtk/timsort.c index 9edd3d5054..bf2cfee40f 100644 --- a/testsuite/gtk/timsort.c +++ b/testsuite/gtk/timsort.c @@ -196,6 +196,46 @@ test_pointers_huge (void) g_free (a); } +static void +test_steps (void) +{ + GtkTimSortRun change; + GtkTimSort sort; + int *a, *b; + gsize i, n; + + n = g_test_rand_int_range (20 * 1000, 50 * 1000); + + a = g_new (int, n); + for (i = 0; i < n; i++) + a[i] = g_test_rand_int (); + b = g_memdup (a, sizeof (int) * n); + + gtk_tim_sort_init (&sort, a, n, sizeof (int), compare_int, NULL); + gtk_tim_sort_set_max_merge_size (&sort, g_test_rand_int_range (512, 2048)); + + while (gtk_tim_sort_step (&sort, &change)) + { + if (change.len) + { + int *a_start = change.base; + int *b_start = b + ((int *) change.base - a); + g_assert_cmpint (a_start[0], !=, b_start[0]); + g_assert_cmpint (a_start[change.len - 1], !=, b_start[change.len - 1]); + memcpy (b_start, a_start, change.len * sizeof (int)); + } + + assert_sort_equal (a, b, int, n); + } + + g_qsort_with_data (b, n, sizeof (int), compare_int, NULL); + assert_sort_equal (a, b, int, n); + + gtk_tim_sort_finish (&sort); + g_free (b); + g_free (a); +} + int main (int argc, char *argv[]) { @@ -207,6 +247,7 @@ main (int argc, char *argv[]) g_test_add_func ("/timsort/integers/huge", test_integers_huge); g_test_add_func ("/timsort/pointers", test_pointers); g_test_add_func ("/timsort/pointers/huge", test_pointers_huge); + g_test_add_func ("/timsort/steps", test_steps); return g_test_run (); } -- 2.30.2